This project demonstrates a comprehensive single-cell RNA sequencing (scRNA-seq) analysis workflow using the Seurat package in R. The dataset consists of peripheral blood mononuclear cells (PBMCs) from 10x Genomics, containing approximately 2,700 cells.
The workflow includes:
Install required packages if not already installed:
# Install BiocManager
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
# Install Seurat (specific version for compatibility)
install.packages("devtools")
devtools::install_version("Seurat", version = "4.3.0")
# Install other required packages
BiocManager::install(c("org.Hs.eg.db"))
install.packages(c("dplyr", "clustree", "ggplot2", "ggtree"))
library(Seurat)
library(dplyr)
library(clustree)
library(ggplot2)
IMPORTANT: Update the path below to your actual data
directory containing the 10x files: - barcodes.tsv -
genes.tsv - matrix.mtx
# Replace 'PATH-TO-FILES/' with your actual directory path
# Example: "/Users/username/data/pbmc3k/" or "C:/Users/username/data/pbmc3k/"
pbmc.data <- Read10X(data.dir = "C:/Users/Ibrah/Downloads/sc/Data/")
# Create Seurat object with initial QC filters
pbmc <- CreateSeuratObject(counts = pbmc.data,
project = "pbmc3k",
min.cells = 3,
min.features = 200)
pbmc
## An object of class Seurat
## 13714 features across 2700 samples within 1 assay
## Active assay: RNA (13714 features, 0 variable features)
## 2 layers present: counts, data
The dataset contains 13,714 features (genes) across 2,700 samples (cells).
Check for red blood cell contamination:
pbmc[["percent.hb"]] <- PercentageFeatureSet(pbmc, pattern = "^HB[^(P)]")
VlnPlot(pbmc, features = "percent.hb")
pbmc[["percent.mito"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc[["percent.ribo"]] <- PercentageFeatureSet(pbmc, pattern = "^RP[SL]")
VlnPlot(pbmc, features = c("percent.mito", "percent.ribo"), ncol = 2)
FeatureScatter(pbmc, feature1 = "percent.mito", feature2 = "percent.ribo")
VlnPlot(pbmc, features = c("nCount_RNA", "nFeature_RNA"))
Apply QC thresholds: - nCount_RNA: 1,000 - 4,000 - nFeature_RNA: 500 - 1,200 - percent.mito: < 5% - percent.ribo: > 20%
pbmc_filt <- subset(pbmc,
subset = nCount_RNA > 1000 & nCount_RNA < 4000 &
nFeature_RNA > 500 & nFeature_RNA < 1200 &
percent.mito < 5 & percent.ribo > 20)
pbmc_filt <- NormalizeData(pbmc_filt,
normalization.method = "LogNormalize",
scale.factor = 10000)
pbmc_filt <- FindVariableFeatures(pbmc_filt,
selection.method = "vst",
nfeatures = 2000)
# Identify top 10 HVGs
pbmc.top10 <- head(VariableFeatures(pbmc_filt), 10)
# Plot variable features
plot1 <- VariableFeaturePlot(pbmc_filt)
LabelPoints(plot = plot1, points = pbmc.top10, xnudge = 0, ynudge = 0, repel = TRUE)
pbmc_filt <- ScaleData(pbmc_filt, features = rownames(pbmc_filt))
pbmc_filt <- RunPCA(object = pbmc_filt,
features = VariableFeatures(object = pbmc_filt))
PCAPlot(pbmc_filt)
ElbowPlot(pbmc_filt, reduction = "pca", ndims = 50)
Based on the elbow plot, we select 10 PCs for downstream analysis.
Note: This is computationally intensive and may take several minutes.
pbmc_filt <- JackStraw(pbmc_filt, num.replicate = 100)
pbmc_filt <- ScoreJackStraw(pbmc_filt, dims = 1:20)
JackStrawPlot(pbmc_filt, dims = 1:20)
pbmc_filt <- FindNeighbors(object = pbmc_filt, dims = 1:10)
res <- seq.int(0.1, 2.0, 0.1)
for (i in res) {
pbmc_filt <- FindClusters(object = pbmc_filt, resolution = i)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9605
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9295
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8997
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8793
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8600
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8421
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8243
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8067
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7890
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7715
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7544
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7373
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7205
## Number of communities: 9
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7055
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6924
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6814
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6701
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6595
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6514
## Number of communities: 14
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6424
## Number of communities: 14
## Elapsed time: 0 seconds
# Visualize cluster stability
clustree(pbmc_filt@meta.data, prefix = "RNA_snn_res.")
Based on the cluster tree, resolution 0.5 provides stable clusters.
pbmc_filt <- FindClusters(pbmc_filt, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 75081
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8600
## Number of communities: 7
## Elapsed time: 0 seconds
set.seed(1001) # For reproducibility
pbmc_filt <- RunTSNE(pbmc_filt, dims = 1:10)
pbmc_filt <- RunUMAP(object = pbmc_filt, dims = 1:10)
# Plot side by side
DimPlot(pbmc_filt, label = TRUE, reduction = "tsne") +
DimPlot(pbmc_filt, label = TRUE, reduction = "umap")
# Feature plots
FeaturePlot(pbmc_filt,
features = c("CD19", "CD3E", "CD14"),
order = TRUE,
cols = c("lightgrey", "red"),
pt.size = 1.5,
ncol = 3)
# Violin plots - normalized data
VlnPlot(pbmc_filt, features = c("CD19", "CD3E", "CD14"), ncol = 1)
# Violin plots - raw counts
VlnPlot(pbmc_filt, features = c("CD19", "CD3E", "CD14"), ncol = 1, slot = "counts")
Interpretation: - CD19+ → B cells (Cluster 3) - CD3E+ → T cells (Clusters 0, 1, 4, 5) - CD14+ → Monocytes (Clusters 2, 6)
IMPORTANT: Update the path to your reference dataset.
# Load and prepare reference dataset
# Replace with your actual path to reference.rds
reference <- readRDS("C:/Users/Ibrah/Downloads/sc/Data/reference.rds")
reference <- reference %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA(verbose = FALSE) %>%
RunUMAP(dims = 1:10)
DimPlot(reference, group.by = "cell_type", label = TRUE, repel = TRUE, reduction = "umap")
# Transfer cell type annotations
transfer.anchors <- FindTransferAnchors(reference = reference,
query = pbmc_filt,
dims = 1:10)
predictions <- TransferData(anchorset = transfer.anchors,
refdata = reference$cell_type,
dims = 1:10)
pbmc_filt <- AddMetaData(object = pbmc_filt, metadata = predictions)
# Plot predicted cell types
DimPlot(pbmc_filt, group.by = "predicted.id", label = TRUE, reduction = "umap")
ggplot(pbmc_filt@meta.data, aes(x = Idents(pbmc_filt), fill = predicted.id)) +
geom_bar() +
theme_classic() +
xlab("Cluster") +
ylab("Number of Cells")
degs <- FindAllMarkers(pbmc_filt,
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.5)
# Filter by adjusted p-value
degs <- subset(degs, p_val_adj < 0.05)
# Separate upregulated and downregulated DEGs
up_degs <- subset(degs, avg_log2FC > 0.0)
down_degs <- subset(degs, avg_log2FC < 0.0)
DoHeatmap(pbmc_filt,
features = up_degs$gene,
raster = FALSE) +
theme(axis.text.y = element_blank())
library(ggtree)
tree <- BuildClusterTree(pbmc_filt, assay = "RNA")
myphytree <- Tool(object = tree, slot = "BuildClusterTree")
ggtree(myphytree) +
geom_tiplab() +
theme_tree() +
xlim(NA, 500) +
ggtitle("Hierarchical Clustering based on HVGs")
# Load additional libraries
library(clusterProfiler)
library(enrichplot)
library(org.Hs.eg.db)
# Extract upregulated DEGs for each cluster
clust0 <- subset(up_degs, cluster == "0")
up0 <- clust0$gene
clust1 <- subset(up_degs, cluster == "1")
up1 <- clust1$gene
clust2 <- subset(up_degs, cluster == "2")
up2 <- clust2$gene
clust3 <- subset(up_degs, cluster == "3")
up3 <- clust3$gene
clust4 <- subset(up_degs, cluster == "4")
up4 <- clust4$gene
clust5 <- subset(up_degs, cluster == "5")
up5 <- clust5$gene
clust6 <- subset(up_degs, cluster == "6")
up6 <- clust6$gene
# Create list of gene symbols
gene_symbols <- list(up0, up1, up2, up3, up4, up5, up6)
names(gene_symbols) <- c("Cluster0", "Cluster1", "Cluster2", "Cluster3", "Cluster4", "Cluster5", "Cluster6")
# Gene Ontology - Biological Process
bp <- compareCluster(geneCluster = gene_symbols, fun = "enrichGO", OrgDb = "org.Hs.eg.db", ont = "BP", keyType = "SYMBOL")
dotplot(bp, showCategory = 10, font.size = 8.1) + ggtitle("Biological Process (GO)") + theme(plot.title = element_text(hjust = 0.5))
# 11.2 Gene Ontology - Cellular Component
# Gene Ontology - Cellular Component
cc <- compareCluster(geneCluster = gene_symbols, fun = "enrichGO", OrgDb = "org.Hs.eg.db", ont = "CC", keyType = "SYMBOL")
dotplot(cc, showCategory = 10, font.size = 10) + ggtitle("Cellular Component (GO)") + theme(plot.title = element_text(hjust = 0.5))
# 11.3 Gene Ontology - Molecular Function
# Gene Ontology - Molecular Function
mf <- compareCluster(geneCluster = gene_symbols, fun = "enrichGO", OrgDb = "org.Hs.eg.db", ont = "MF", keyType = "SYMBOL")
dotplot(mf, showCategory = 10, font.size = 10) + ggtitle("Molecular Function (GO)") + theme(plot.title = element_text(hjust = 0.5))
# 11.4 KEGG pathway analysis
# Convert gene symbols to Entrez IDs for KEGG analysis
temp <- select(org.Hs.eg.db, keys = gene_symbols$Cluster0, columns = c("ENTREZID"), keytype = "SYMBOL")
c0 <- na.omit(unique(temp$ENTREZID))
temp <- select(org.Hs.eg.db, keys = gene_symbols$Cluster1, columns = c("ENTREZID"), keytype = "SYMBOL")
c1 <- na.omit(unique(temp$ENTREZID))
temp <- select(org.Hs.eg.db, keys = gene_symbols$Cluster2, columns = c("ENTREZID"), keytype = "SYMBOL")
c2 <- na.omit(unique(temp$ENTREZID))
temp <- select(org.Hs.eg.db, keys = gene_symbols$Cluster3, columns = c("ENTREZID"), keytype = "SYMBOL")
c3 <- na.omit(unique(temp$ENTREZID))
temp <- select(org.Hs.eg.db, keys = gene_symbols$Cluster4, columns = c("ENTREZID"), keytype = "SYMBOL")
c4 <- na.omit(unique(temp$ENTREZID))
temp <- select(org.Hs.eg.db, keys = gene_symbols$Cluster5, columns = c("ENTREZID"), keytype = "SYMBOL")
c5 <- na.omit(unique(temp$ENTREZID))
temp <- select(org.Hs.eg.db, keys = gene_symbols$Cluster6, columns = c("ENTREZID"), keytype = "SYMBOL")
c6 <- na.omit(unique(temp$ENTREZID))
# Create list of Entrez IDs
gene_entrez <- list(c0, c1, c2, c3, c4, c5, c6)
names(gene_entrez) <- c("Cluster0", "Cluster1", "Cluster2", "Cluster3", "Cluster4", "Cluster5", "Cluster6")
# KEGG pathway analysis
kegg <- compareCluster(geneCluster = gene_entrez, fun = "enrichKEGG")
dotplot(kegg, showCategory = 10, font.size = 10) + ggtitle("Pathways (KEGG)") + theme(plot.title = element_text(hjust = 0.5))
# SCTransform Workflow
# ======================================================================
# Filter data (same as before)
pbmc_sct <- subset(pbmc, subset = nCount_RNA > 1000 & nCount_RNA < 4000 & nFeature_RNA > 500 & nFeature_RNA < 1200 & percent.mito < 5 & percent.ribo > 20)
# Run SCTransform (replaces normalization, scaling, and variable feature selection)
pbmc_sct <- SCTransform(pbmc_sct, assay = "RNA", variable.features.n = 3000)
## | | | 0% | |================== | 25% | |=================================== | 50% | |==================================================== | 75% | |======================================================================| 100%
## | | | 0% | |=== | 4% | |====== | 8% | |========= | 12% | |============ | 17% | |=============== | 21% | |================== | 25% | |==================== | 29% | |======================= | 33% | |========================== | 38% | |============================= | 42% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |========================================= | 58% | |============================================ | 62% | |=============================================== | 67% | |================================================== | 71% | |==================================================== | 75% | |======================================================= | 79% | |========================================================== | 83% | |============================================================= | 88% | |================================================================ | 92% | |=================================================================== | 96% | |======================================================================| 100%
## | | | 0% | |=== | 4% | |====== | 8% | |========= | 12% | |============ | 17% | |=============== | 21% | |================== | 25% | |==================== | 29% | |======================= | 33% | |========================== | 38% | |============================= | 42% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |========================================= | 58% | |============================================ | 62% | |=============================================== | 67% | |================================================== | 71% | |==================================================== | 75% | |======================================================= | 79% | |========================================================== | 83% | |============================================================= | 88% | |================================================================ | 92% | |=================================================================== | 96% | |======================================================================| 100%
# PCA
pbmc_sct <- RunPCA(object = pbmc_sct, features = VariableFeatures(object = pbmc_sct))
PCAPlot(pbmc_sct)
ElbowPlot(pbmc_sct, reduction = "pca", ndims = 50)
# Clustering with more PCs (25 instead of 10)
pbmc_sct <- FindNeighbors(object = pbmc_sct, dims = 1:25)
# Test multiple resolutions
res <- seq.int(0.1, 2.0, 0.1)
for (i in res) {
pbmc_sct <- FindClusters(object = pbmc_sct, resolution = i)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9647
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9343
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9060
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8816
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8629
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8466
## Number of communities: 9
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8320
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8173
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8035
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7912
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7788
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7664
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7541
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7418
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7296
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7184
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7086
## Number of communities: 14
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6996
## Number of communities: 14
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6905
## Number of communities: 14
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6816
## Number of communities: 14
## Elapsed time: 0 seconds
clustree(pbmc_sct@meta.data, prefix = "SCT_snn_res.")
# Set resolution
pbmc_sct <- FindClusters(pbmc_sct, resolution = 0.7)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2048
## Number of edges: 86721
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8320
## Number of communities: 10
## Elapsed time: 0 seconds
# UMAP
pbmc_sct <- RunUMAP(object = pbmc_sct, dims = 1:25)
DimPlot(pbmc_sct, label = TRUE)
# 13 Cell type prediction with SCT reference
# Cell type prediction with SCT reference
reference_sct <- reference %>%
SCTransform() %>%
RunPCA(verbose = FALSE) %>%
RunUMAP(dims = 1:25)
## | | | 0% | |================== | 25% | |=================================== | 50% | |==================================================== | 75% | |======================================================================| 100%
## | | | 0% | |== | 4% | |===== | 7% | |======== | 11% | |========== | 14% | |============ | 18% | |=============== | 21% | |================== | 25% | |==================== | 29% | |====================== | 32% | |========================= | 36% | |============================ | 39% | |============================== | 43% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |======================================== | 57% | |========================================== | 61% | |============================================= | 64% | |================================================ | 68% | |================================================== | 71% | |==================================================== | 75% | |======================================================= | 79% | |========================================================== | 82% | |============================================================ | 86% | |============================================================== | 89% | |================================================================= | 93% | |==================================================================== | 96% | |======================================================================| 100%
## | | | 0% | |== | 4% | |===== | 7% | |======== | 11% | |========== | 14% | |============ | 18% | |=============== | 21% | |================== | 25% | |==================== | 29% | |====================== | 32% | |========================= | 36% | |============================ | 39% | |============================== | 43% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |======================================== | 57% | |========================================== | 61% | |============================================= | 64% | |================================================ | 68% | |================================================== | 71% | |==================================================== | 75% | |======================================================= | 79% | |========================================================== | 82% | |============================================================ | 86% | |============================================================== | 89% | |================================================================= | 93% | |==================================================================== | 96% | |======================================================================| 100%
transfer.anchors <- FindTransferAnchors(reference = reference_sct, query = pbmc_sct, dims = 1:25)
predictions <- TransferData(anchorset = transfer.anchors, refdata = reference_sct$cell_type, dims = 1:25)
pbmc_sct <- AddMetaData(object = pbmc_sct, metadata = predictions)
DimPlot(pbmc_sct, group.by = "predicted.id", label = TRUE, reduction = "umap", repel = TRUE) + DimPlot(pbmc_sct, label = TRUE)
# Cell type distribution
ggplot(pbmc_sct@meta.data, aes(x = Idents(pbmc_sct), fill = predicted.id)) +
geom_bar() +
theme_classic() +
xlab("Cluster") +
ylab("Number of Cells")
# Check specific markers
VlnPlot(pbmc_sct, features = c("GZMK", "CCR7"), ncol = 2)
sessionInfo()
## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8
## [2] LC_CTYPE=English_United Kingdom.utf8
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.utf8
##
## time zone: Europe/Stockholm
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] org.Hs.eg.db_3.18.0 AnnotationDbi_1.64.1 IRanges_2.36.0
## [4] S4Vectors_0.40.2 Biobase_2.62.0 BiocGenerics_0.48.1
## [7] enrichplot_1.22.0 clusterProfiler_4.10.1 ggtree_3.10.1
## [10] clustree_0.5.1 ggraph_2.2.1 ggplot2_3.5.1
## [13] dplyr_1.1.4 SeuratObject_5.0.2 Seurat_4.3.0
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.3.3 later_1.3.2
## [4] bitops_1.0-9 ggplotify_0.1.2 tibble_3.2.1
## [7] R.oo_1.26.0 polyclip_1.10-7 lifecycle_1.0.4
## [10] globals_0.16.3 lattice_0.22-5 MASS_7.3-60.0.1
## [13] backports_1.5.0 magrittr_2.0.3 limma_3.58.1
## [16] plotly_4.10.4 sass_0.4.9 rmarkdown_2.28
## [19] jquerylib_0.1.4 yaml_2.3.10 httpuv_1.6.15
## [22] sctransform_0.4.1 spam_2.11-0 sp_2.1-4
## [25] spatstat.sparse_3.1-0 reticulate_1.39.0 cowplot_1.1.3
## [28] pbapply_1.7-2 DBI_1.2.3 RColorBrewer_1.1-3
## [31] zlibbioc_1.48.2 abind_1.4-8 Rtsne_0.17
## [34] purrr_1.0.2 R.utils_2.12.3 RCurl_1.98-1.16
## [37] yulab.utils_0.1.7 tweenr_2.0.3 GenomeInfoDbData_1.2.11
## [40] ggrepel_0.9.6 irlba_2.3.5.1 listenv_0.9.1
## [43] spatstat.utils_3.1-0 tidytree_0.4.6 goftest_1.2-3
## [46] spatstat.random_3.3-2 fitdistrplus_1.2-1 parallelly_1.38.0
## [49] leiden_0.4.3.1 codetools_0.2-19 DOSE_3.28.2
## [52] ggforce_0.4.2 tidyselect_1.2.1 aplot_0.2.3
## [55] farver_2.1.2 viridis_0.6.5 matrixStats_1.4.1
## [58] spatstat.explore_3.3-3 jsonlite_1.8.9 tidygraph_1.3.1
## [61] progressr_0.14.0 ggridges_0.5.6 survival_3.8-3
## [64] tools_4.3.3 treeio_1.26.0 ica_1.0-3
## [67] Rcpp_1.0.12 glue_1.8.0 gridExtra_2.3
## [70] xfun_0.49 qvalue_2.34.0 GenomeInfoDb_1.38.8
## [73] withr_3.0.2 fastmap_1.2.0 fansi_1.0.6
## [76] digest_0.6.35 R6_2.6.1 mime_0.12
## [79] gridGraphics_0.5-1 colorspace_2.1-1 GO.db_3.18.0
## [82] scattermore_1.2 tensor_1.5 spatstat.data_3.1-2
## [85] RSQLite_2.3.7 R.methodsS3_1.8.2 utf8_1.2.4
## [88] tidyr_1.3.1 generics_0.1.3 data.table_1.16.0
## [91] graphlayouts_1.2.0 httr_1.4.7 htmlwidgets_1.6.4
## [94] scatterpie_0.2.4 uwot_0.2.2 pkgconfig_2.0.3
## [97] gtable_0.3.5 blob_1.2.4 lmtest_0.9-40
## [100] XVector_0.42.0 shadowtext_0.1.4 htmltools_0.5.8.1
## [103] fgsea_1.28.0 dotCall64_1.2 scales_1.3.0
## [106] png_0.1-8 spatstat.univar_3.0-1 ggfun_0.1.6
## [109] knitr_1.48 rstudioapi_0.16.0 reshape2_1.4.4
## [112] checkmate_2.3.2 nlme_3.1-164 cachem_1.1.0
## [115] zoo_1.8-12 stringr_1.5.1 KernSmooth_2.23-22
## [118] HDO.db_0.99.1 parallel_4.3.3 miniUI_0.1.2
## [121] pillar_1.9.0 grid_4.3.3 vctrs_0.6.5
## [124] RANN_2.6.2 promises_1.3.0 xtable_1.8-4
## [127] cluster_2.1.6 evaluate_1.0.5 cli_3.6.2
## [130] compiler_4.3.3 crayon_1.5.3 rlang_1.1.3
## [133] future.apply_1.11.2 labeling_0.4.3 plyr_1.8.9
## [136] fs_1.6.5 stringi_1.8.3 BiocParallel_1.36.0
## [139] viridisLite_0.4.2 deldir_2.0-4 Biostrings_2.70.3
## [142] munsell_0.5.1 lazyeval_0.2.2 spatstat.geom_3.3-3
## [145] GOSemSim_2.28.1 Matrix_1.6-5 patchwork_1.3.0
## [148] bit64_4.5.2 future_1.34.0 KEGGREST_1.42.0
## [151] statmod_1.5.0 shiny_1.9.1 highr_0.11
## [154] ROCR_1.0-11 igraph_2.0.3 memoise_2.0.1
## [157] bslib_0.8.0 fastmatch_1.1-4 bit_4.5.0
## [160] gson_0.1.0 ape_5.8